import json
import traceback

from selenium import webdriver
import os
import time
import re
from bs4 import BeautifulSoup
import random
import requests

import mysql_connect


# header = {
#     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
#     'Accept-Encoding': 'gzip, deflate, sdch',
#     'Accept-Language': 'zh-CN,zh;q=0.8',
#     'Connection': 'keep-alive',
#     'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235',
#     "Cookie": """_zap=aa73886a-e368-4332-a732-67e854600ad1; d_c0=AGCYZtHCpxaPTjCAaTP9VRiKH8L0bqJfs78=|1681995372; YD00517437729195%3AWM_TID=YJHQ4pfKYCFERQEFQRc7%2FURz245Mnc74; __snaker__id=m99RKmKSTHtHwb4Y; YD00517437729195%3AWM_NI=oLRPwWyh8FIqo2ONE8hwpD%2Bk98nvFNska4yJg4j0VH1kPcYSIB%2FgRvSQJTgbxRlP3PedGeVRRSOokk0ok3CX8MCJ8JtbBYLsuaNbgbD63ySLiZqh94wtGaxowp%2BZtH8%2BUUQ%3D; YD00517437729195%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eeb6d121b187a691ca70ae928bb3d55a829e9eadd5648c8dfea6ea6188bfbcaed52af0fea7c3b92aad949eb1f65dbbb8f8a7cc7badee81afc834aca9c0a5aa61fbe8a297c546b1b5afd5fc42a5eca6d1d544f3b5a084dc598b9ee188d17e918f86b5e66abc8ea4b1b25ca7bc8986d55a928d8190ea61f4b1a5d4c254fc938dabbb42b8be8ebacf79b7bf8283e66a88eafd9ad248ab86bf89bc40b5ac8982fb65b5b0e5bbf744fcf59bb6d437e2a3; q_c1=d333a471c0e84dffa5befaae67831722|1684920221000|1684920221000; q_c1=d333a471c0e84dffa5befaae67831722|1692624665000|1684920221000; __utmv=51854390.100--|2=registration_date=20160703=1^3=entry_date=20160703=1; _xsrf=b52003193babcd2f3a19472296f30a04; z_c0=2|1:0|10:1693130116|4:z_c0|80:MS4xQWZRMEF3QUFBQUFtQUFBQVlBSlZUWU5yMkdYWjNhOWdUV0hzRm5SYTVodDlPS3pPMUkxMFBBPT0=|d845d5db9929b06cc93d9081676c65136a44d6861d1ec002e5877b8764739b05; __utma=51854390.1016281673.1692624666.1693312041.1693460941.4; __utmz=51854390.1693460941.4.4.utmcsr=cn.bing.com|utmccn=(referral)|utmcmd=referral|utmcct=/; tst=r; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1693719529,1693914623,1693998630,1694083538; SESSIONID=yO2NsNVSvxgHOmegcfCLxOdKojDZWIn18DwpLpc9QWs; JOID=UVsdAkM4Wv6XISC5SDrxZK9VoPNXQR7F0Flu5BJFIpLWZ0bcJOLxB_EqK7tIBULwbh9lO0ueWkbl1LKEeYiP-qo=; osd=V1kVBUk-WPaQKya7QD37Yq1dp_lRQxbC2l9s7BVPJJDeYEzaJur2DfcoI7xCA0D4aRVjOUOZUEDn3LWOf4qH_aA=; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1694083541; KLBRSID=b33d76655747159914ef8c32323d16fd|1694083557|1694083534"""
# }


def get_data_test():
    the_url = "https://www.zhihu.com/api/v4/comment_v5/answers/12202037/root_comment?order_by=score&limit=20&offset="
    essay_url = "https://www.zhihu.com/api/v4/articles/19575605/root_comments?order=normal&limit=20&offset=20&status=open"

    header = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Connection': 'keep-alive',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235',
        "Cookie": """_zap=aa73886a-e368-4332-a732-67e854600ad1; d_c0=AGCYZtHCpxaPTjCAaTP9VRiKH8L0bqJfs78=|1681995372; YD00517437729195%3AWM_TID=YJHQ4pfKYCFERQEFQRc7%2FURz245Mnc74; __snaker__id=m99RKmKSTHtHwb4Y; YD00517437729195%3AWM_NI=oLRPwWyh8FIqo2ONE8hwpD%2Bk98nvFNska4yJg4j0VH1kPcYSIB%2FgRvSQJTgbxRlP3PedGeVRRSOokk0ok3CX8MCJ8JtbBYLsuaNbgbD63ySLiZqh94wtGaxowp%2BZtH8%2BUUQ%3D; YD00517437729195%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eeb6d121b187a691ca70ae928bb3d55a829e9eadd5648c8dfea6ea6188bfbcaed52af0fea7c3b92aad949eb1f65dbbb8f8a7cc7badee81afc834aca9c0a5aa61fbe8a297c546b1b5afd5fc42a5eca6d1d544f3b5a084dc598b9ee188d17e918f86b5e66abc8ea4b1b25ca7bc8986d55a928d8190ea61f4b1a5d4c254fc938dabbb42b8be8ebacf79b7bf8283e66a88eafd9ad248ab86bf89bc40b5ac8982fb65b5b0e5bbf744fcf59bb6d437e2a3; q_c1=d333a471c0e84dffa5befaae67831722|1684920221000|1684920221000; q_c1=d333a471c0e84dffa5befaae67831722|1692624665000|1684920221000; __utmv=51854390.100--|2=registration_date=20160703=1^3=entry_date=20160703=1; _xsrf=b52003193babcd2f3a19472296f30a04; z_c0=2|1:0|10:1693130116|4:z_c0|80:MS4xQWZRMEF3QUFBQUFtQUFBQVlBSlZUWU5yMkdYWjNhOWdUV0hzRm5SYTVodDlPS3pPMUkxMFBBPT0=|d845d5db9929b06cc93d9081676c65136a44d6861d1ec002e5877b8764739b05; __utma=51854390.1016281673.1692624666.1693312041.1693460941.4; __utmz=51854390.1693460941.4.4.utmcsr=cn.bing.com|utmccn=(referral)|utmcmd=referral|utmcct=/; tst=r; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1693719529,1693914623,1693998630,1694083538; SESSIONID=yO2NsNVSvxgHOmegcfCLxOdKojDZWIn18DwpLpc9QWs; JOID=UVsdAkM4Wv6XISC5SDrxZK9VoPNXQR7F0Flu5BJFIpLWZ0bcJOLxB_EqK7tIBULwbh9lO0ueWkbl1LKEeYiP-qo=; osd=V1kVBUk-WPaQKya7QD37Yq1dp_lRQxbC2l9s7BVPJJDeYEzaJur2DfcoI7xCA0D4aRVjOUOZUEDn3LWOf4qH_aA=; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1694083541; KLBRSID=b33d76655747159914ef8c32323d16fd|1694083557|1694083534"""
    }
    req = requests.get(essay_url, headers=header)
    # print(req.text)
    json_obj = json.loads(req.text)
    formatted_json = json.dumps(json_obj, ensure_ascii=False, indent=4)
    print(formatted_json)


def get_essay_id_list():
    conn, cursor = mysql_connect.get_conn()
    essay_id_list: list[int] = []
    try:
        sql = """
            select id from shane_answer;
        """
        cursor.execute(sql)
        result = cursor.fetchall()
        essay_id_list = [id[0] for id in result]
    except:
        traceback.print_exc()
    finally:
        cursor.close()
        conn.close()
    return essay_id_list


def get_json_str_save_in_json_file(essay_id: int):
    the_url = f"https://www.zhihu.com/api/v4/articles/{essay_id}/root_comments?order=normal&limit=20&offset=20&status=open"
    header = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Connection': 'keep-alive',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235',
        "Cookie": """_zap=aa73886a-e368-4332-a732-67e854600ad1; d_c0=AGCYZtHCpxaPTjCAaTP9VRiKH8L0bqJfs78=|1681995372; YD00517437729195%3AWM_TID=YJHQ4pfKYCFERQEFQRc7%2FURz245Mnc74; __snaker__id=m99RKmKSTHtHwb4Y; YD00517437729195%3AWM_NI=oLRPwWyh8FIqo2ONE8hwpD%2Bk98nvFNska4yJg4j0VH1kPcYSIB%2FgRvSQJTgbxRlP3PedGeVRRSOokk0ok3CX8MCJ8JtbBYLsuaNbgbD63ySLiZqh94wtGaxowp%2BZtH8%2BUUQ%3D; YD00517437729195%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eeb6d121b187a691ca70ae928bb3d55a829e9eadd5648c8dfea6ea6188bfbcaed52af0fea7c3b92aad949eb1f65dbbb8f8a7cc7badee81afc834aca9c0a5aa61fbe8a297c546b1b5afd5fc42a5eca6d1d544f3b5a084dc598b9ee188d17e918f86b5e66abc8ea4b1b25ca7bc8986d55a928d8190ea61f4b1a5d4c254fc938dabbb42b8be8ebacf79b7bf8283e66a88eafd9ad248ab86bf89bc40b5ac8982fb65b5b0e5bbf744fcf59bb6d437e2a3; q_c1=d333a471c0e84dffa5befaae67831722|1684920221000|1684920221000; q_c1=d333a471c0e84dffa5befaae67831722|1692624665000|1684920221000; __utmv=51854390.100--|2=registration_date=20160703=1^3=entry_date=20160703=1; _xsrf=b52003193babcd2f3a19472296f30a04; z_c0=2|1:0|10:1693130116|4:z_c0|80:MS4xQWZRMEF3QUFBQUFtQUFBQVlBSlZUWU5yMkdYWjNhOWdUV0hzRm5SYTVodDlPS3pPMUkxMFBBPT0=|d845d5db9929b06cc93d9081676c65136a44d6861d1ec002e5877b8764739b05; __utma=51854390.1016281673.1692624666.1693312041.1693460941.4; __utmz=51854390.1693460941.4.4.utmcsr=cn.bing.com|utmccn=(referral)|utmcmd=referral|utmcct=/; tst=r; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1693719529,1693914623,1693998630,1694083538; SESSIONID=yO2NsNVSvxgHOmegcfCLxOdKojDZWIn18DwpLpc9QWs; JOID=UVsdAkM4Wv6XISC5SDrxZK9VoPNXQR7F0Flu5BJFIpLWZ0bcJOLxB_EqK7tIBULwbh9lO0ueWkbl1LKEeYiP-qo=; osd=V1kVBUk-WPaQKya7QD37Yq1dp_lRQxbC2l9s7BVPJJDeYEzaJur2DfcoI7xCA0D4aRVjOUOZUEDn3LWOf4qH_aA=; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1694083541; KLBRSID=b33d76655747159914ef8c32323d16fd|1694083557|1694083534"""
    }
    req = requests.get(the_url, headers=header)
    json_obj = json.loads(req.text)
    formatted_json = json.dumps(json_obj, ensure_ascii=False, indent=4)
    with open(f"other_files/comment_jsons/essay_jsons/{essay_id}.json", 'w', encoding='utf-8') as w_file:
        w_file.write(formatted_json)


def main():
    essay_id_list: list[int] = get_essay_id_list()
    already_list = os.listdir("other_files/comment_jsons/essay_jsons")
    already_id_list = [int(file_name.split('.')[0]) for file_name in already_list]

    for index, essay_id in enumerate(essay_id_list):
        if essay_id in already_id_list:
            continue
        sleep_time_get_url = random.uniform(1, 3)
        print(f"现在是第{index + 1}个，id为{essay_id}，共有{len(essay_id_list)}个,即将休眠{sleep_time_get_url}秒")
        try:
            get_json_str_save_in_json_file(essay_id)
            time.sleep(sleep_time_get_url)
        except:
            print("大失败！！！")
            traceback.print_exc()
            time.sleep(sleep_time_get_url)



if __name__ == "__main__":
    main()
